/*
Coded 5/11/24 - Questions to terence.wood@anu.edu.au

This calls the individual do files that produce the analysis in the paper:
"Does independent review reduce the reported performance of aid projects?"

This do file:
1. Creates Table 1 - a table of differences between ongoing and final reports.
2. Creates Table 2 - a table of descriptive statistics.
3. Creates Figure 1 - a chart comparing numbers of ongoing and final reports.

IMPORTANT NOTES:
1. You will need Stata version 18 or 17 to use collection & table as this code 
   does.
2. If you want to run this code on its own, replace the global macros in the
   next section with your folder and data file names,
*/

********************************Preliminaries***********************************

// If you are running this section on its own replace the locations/filenames
// in the macros below to the appropriate locations/file from your computer.
   
cd "$data_loc"  // Change directory to location where data are stored.
use "$data_file", clear // Open data
cd "$res_loc" // Change directory to location where analysis will occur.


********************Table 1 compare final and ongoing reports*******************

preserve

// Prepare data
// For simplicity's sake only keep relevant variables.
keep id year pnencode region pacific country iso3 name reporttype treated covid ///
 effectiveness efficiency ee  any_issue humanitarian earlyclose

// Because humanitarian projects are not used in this paper's analysis drop them.
drop if humanitarian ==1
// Because project assements with data issues cannot reliably be compared over 
// time, drop these.
drop if any_issue==1

/* 
This section identifies each project's most recent ongoing performance score.
This is done so this value can be compared to its final performance score.
*/
     
// Get an id within each project, except for final reports.
sort pnencode year
bysort pnencode (year): gen pid=(_n) if reporttype<3

// Average the performance score of all ongoing reports for each project.
bysort pnencode: egen ongoing_avg = mean(ee) if reporttype<3

// Distinguish the last (most recent) ongoing report for each project.
bysort pnencode: egen mo = max(pid) if reporttype<3
gen last_ongoing =.
replace last_ongoing=. if pid<mo
replace last_ongoing=ee if pid == mo
replace last_ongoing=. if pid == .
drop mo
order pid, after(year)

// Create difference variables (difference between final, and last ongoing).
gen finalscore = .
replace finalscore=ee if reporttype==3
bysort pnencode (year) : replace last_ongoing = last_ongoing[_n-1] if reporttype==3
sort pnencode (year)
keep if reporttype==3

gen diff_last=.
replace diff_last= finalscore-last_ongoing
label var diff_last "Difference"

// Create table of summary statistics

collect clear
// First create table for all years pre 2019
table (var) () if year<2019, nototal /// 
	stat(mean diff_last) ///
	stat(median diff_last) ///
	stat(count diff_last) ///
	nformat(%10.2fc mean median) ///
	nformat(%10.0fc count) name(a1)
// Next create same table but excluding 2016.
table (var) () if year<2019 & year !=2016, nototal /// 
	stat(mean diff_last) ///
	stat(median diff_last) ///
	stat(count diff_last) ///
	nformat(%10.2fc mean median) ///
	nformat(%10.0fc count) name(a2)	
// Combine the two tables
collect combine all = a1 a2	
	
// Give table a title and make title bold	
collect title "Table: Difference final and ongoing Reports" 
collect style title, font(, bold) 

// Relabel label of variable diif_last
collect label levels var diff_last "Difference final report - last ongoing report", modify  

// Relabel column headings
collect label levels result mean "Mean", modify  
collect label levels result median "Median", modify
collect label levels result count "N", modify

// Removes the unneeded row title
collect style header, title(hide)
// Layout for collection of tables
collect layout (collection#var) (result)

//Renames the two tables in the collection
collect label levels collection ///
    a1 "All years before 2019" ///
	a2 "Years before 2019 (excluding 2016)"

// Notes to place under table
collect notes 0: Notes: The table is based on calculations of the difference between ///
individual projects' final performance appraisals (those received upon completion) ///
and the last appraisal they received while ongoing. All data come from the pre-2019 ///
period (that is, before appraisal reviews commenced). The mean difference ///
across projects is displayed as well as the difference for the median project.	///
Because 2016 was an atypical year differences are calculated with 2016 included ///
and 2016 excluded.
collect style notes, font(calibri, size(9))
	
// Change font
collect style cell, font(calibri) 

// Get the column widths to autofit in output Word document
collect style putdocx, layout(autofitcontents) 

// Previews modified table
collect preview	

// Export table to Word 
collect export 1 Table 1 differences.docx, replace 
// Note there are no quotes around file name; this seems to be rule 
// when exporting collections.

restore


********************Table 2 - Table of summary statistics***********************
preserve

collect clear

// First create original table
qui table (var) () if humanitarian==0, nototal /// 
	stat(min ee lnbudget duration) ///
	stat(max ee lnbudget duration) ///
	stat(mean ee lnbudget duration) ///
	stat(sd ee lnbudget duration) ///
	stat(count ee lnbudget duration) ///
	stat(fvpercent finalreport) ///
	stat(fvpercent sector) ///
	stat(fvpercent pacific) ///
	nformat(%10.2fc min max mean sd) nformat(%4.0fc fvpercent) 

// Give table a title and make title bold	
collect title "Table: Descriptive statistics" 
collect style title, font(, bold) 

// Re-label categories of categorical variables
collect label levels sector 1 "Agriculture" 2 "Resilience" 3 "Education" ///
	4 "Governance" 5 "General" 6 "Health" 7 "Economic", modify 
collect label levels pacific 0 "Elsewhere" 1 "Pacific", modify

// Relabel variable labels.
collect label levels var ee "Effectiveness & Efficiency", modify  
collect label levels var finalreport "Assessment type", modify  

// Relabel column headings (various statistics)
collect label levels result min "Min", modify  
collect label levels result max "Max", modify
collect label levels result sd "Std. Dvn.", modify
collect label levels result count "N", modify
collect label levels result fvpercent "%", modify

// Format %s in categorical vars as percentages
collect style cell result[fvpercent], sformat("%s%%") 

// Tidy presentation of categorical vars - "nobinder" tidies categories, 
// "spacer" puts blank line in
collect style row stack, nobinder spacer  

// Change font
collect style cell, font(calibri) 

// Get the column widths to autofit in output Word document.
collect style putdocx, layout(autofitcontents) 

// Shows modified table.
collect preview	

// Export document to word.
collect export 2 Table 2 Summary Stats.docx, replace


********Figure 1 - Chart showing number of appraisals by type and by year*******

gen ongoingreport = .
replace ongoingreport = 1 if finalreport==0
replace ongoingreport = 0 if finalreport==1
 
graph bar (sum) ongoingreport (sum) finalreport if humanitarian==0, over(year) ///
ytitle("Projects") bar(1, fcolor(gs6) lcolor(gs6)) bar(2, fcolor(gs12) lcolor(gs6)) ///
legend(label(1 "Ongoing assessment") label(2 "Final assessment") position(6) cols(2))

drop ongoingreport

graph export "3 Figure 1 - Reports over time.jpg", width(10000) replace

estimates clear
graph drop _all
restore